library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.5.1 ✔ fma 2.5
## ✔ forecast 8.23.0 ✔ expsmooth 2.3
##
library(fpp)
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: tseries
##
## Attaching package: 'fpp'
## The following objects are masked from 'package:fpp2':
##
## ausair, ausbeer, austa, austourists, debitcards, departures,
## elecequip, euretail, guinearice, oil, sunspotarea, usmelec
library(readr)
library(readxl)
library(forecast)
library(ggplot2)
carsls = read.csv("https://raw.githubusercontent.com/su-min-oh/Final-Exam/main/TOTALSA.csv")
#make it a time series data
carsls_ts = ts(carsls$Sales.Units.in.Millions.,start=c(2019,1),frequency=12)
plot(carsls_ts)
carsls_sub = window(carsls_ts,start=2022)
plot(carsls_sub)
The data is about the car sales from Jan 2019 to Feb 2024. The plot shows the drastic drop in 2020, and this is assumed to be due to outbreak of Covid. Since the graph has big changes, I cut the data from 2022 to do the further forecast. From the cut graph, I see there is a gradually increasing trend, but it needs further analysis to decide on that.
#Min, max, mean, median, 1st and 3rd Quartile values
summary(carsls_ts)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.944 14.189 15.938 15.612 16.966 18.697
summary(carsls_sub)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.29 14.19 15.47 15.18 15.95 16.45
#boxplot
boxplot(carsls_ts)
boxplot(carsls_sub)
carsls_decomp = decompose(carsls_sub)
carsls_decomp
## $x
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2022 14.866 14.168 14.253 14.681 13.286 13.669 13.867 14.064 14.136 15.091
## 2023 15.665 15.434 15.668 16.446 15.948 16.428 16.302 15.909 16.193 15.799
## 2024 15.513 16.191
## Nov Dec
## 2022 14.733 13.935
## 2023 15.945 16.386
## 2024
##
## $seasonal
## Jan Feb Mar Apr May Jun
## 2022 0.37197569 -0.03735764 0.03405903 0.69685069 0.11885069 0.44622569
## 2023 0.37197569 -0.03735764 0.03405903 0.69685069 0.11885069 0.44622569
## 2024 0.37197569 -0.03735764
## Jul Aug Sep Oct Nov Dec
## 2022 -0.12229514 -0.27592014 -0.39777431 0.42472569 -0.11773264 -1.14160764
## 2023 -0.12229514 -0.27592014 -0.39777431 0.42472569 -0.11773264 -1.14160764
## 2024
##
## $trend
## Jan Feb Mar Apr May Jun Jul Aug
## 2022 NA NA NA NA NA NA 14.26237 14.34842
## 2023 15.21937 15.39771 15.56029 15.67550 15.75550 15.90812 16.00392 16.02913
## 2024 NA NA
## Sep Oct Nov Dec
## 2022 14.46012 14.59262 14.77708 15.00296
## 2023 NA NA NA NA
## 2024
##
## $random
## Jan Feb Mar Apr May
## 2022 NA NA NA NA NA
## 2023 0.073649306 0.073649306 0.073649306 0.073649306 0.073649306
## 2024 NA NA
## Jun Jul Aug Sep Oct
## 2022 NA -0.273079861 -0.008496528 0.073649306 0.073649306
## 2023 0.073649306 0.420378472 0.155795139 NA NA
## 2024
## Nov Dec
## 2022 0.073649306 0.073649306
## 2023 NA NA
## 2024
##
## $figure
## [1] 0.37197569 -0.03735764 0.03405903 0.69685069 0.11885069 0.44622569
## [7] -0.12229514 -0.27592014 -0.39777431 0.42472569 -0.11773264 -1.14160764
##
## $type
## [1] "additive"
##
## attr(,"class")
## [1] "decomposed.ts"
plot(carsls_decomp)
Is the time series seasonal? : From the decomposition plot, I could find some seasonality, but it’s not very obvious.
The decomposition is “Additive”. the decompose() function automatically decided whether to choose additive or multiplicative. The fact that it chose additive means that this data changes in regular pattern and volatility is constant.
Seasonal indices : can be found in above, in $figure.
The lowest month : December / The highest month : April. To assume the reason behind this indices, as this is the data of car sales, in April, the weather starts to get warmer, so many people might plan for the trip and naturally might think of change or buy a car. On the other hand, in December, it gets colder and the holiday begins. People might want to stay home with family in this season.
# Plot for time series adjusted for seasonality
carsls_seasadj = seasadj(carsls_decomp)
plot(carsls_sub)
lines(carsls_seasadj,col="red")
#Output
carsls_naive = naive(carsls_sub)
#Residual Analysis
naive_res = residuals(carsls_naive)
#Plot of residuals
plot(naive_res)
abline(h=0, col="red")
#Histogram of Residuals
hist(naive_res)
#Plot of fitted vs. residuals
naive_fit = fitted(carsls_naive)
ggplot(data = data.frame(naive_fit, naive_res), aes(x = naive_fit, y = naive_res)) +
geom_point(shape = 1) + geom_hline(yintercept = 0,color="red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#Plot of actual vs. residuals
plot(as.numeric(carsls_sub),naive_res, xlab="actual",ylab="residuals")
abline(h=0,col="red")
#Acf
plot(Acf(naive_res))
#Measures of accuracy
accuracy(carsls_naive)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.053 0.6498938 0.51412 0.2460432 3.411387 0.3230122 -0.4356982
#Forecast for next year (table, plot)
naive_fc = naive(carsls_sub,h=12)
naive_fc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2024 16.191 15.35813 17.02387 14.91723 17.46477
## Apr 2024 16.191 15.01314 17.36886 14.38962 17.99238
## May 2024 16.191 14.74842 17.63358 13.98477 18.39723
## Jun 2024 16.191 14.52526 17.85674 13.64346 18.73854
## Jul 2024 16.191 14.32864 18.05336 13.34277 19.03923
## Aug 2024 16.191 14.15089 18.23111 13.07092 19.31108
## Sep 2024 16.191 13.98743 18.39457 12.82093 19.56107
## Oct 2024 16.191 13.83528 18.54672 12.58824 19.79376
## Nov 2024 16.191 13.69238 18.68962 12.36969 20.01231
## Dec 2024 16.191 13.55723 18.82477 12.16299 20.21901
## Jan 2025 16.191 13.42867 18.95333 11.96639 20.41561
## Feb 2025 16.191 13.30585 19.07615 11.77854 20.60346
plot(naive_fc)
How good is the accuracy? : For now, it’s hard to measure it’s accuracy since I haven’t done the accuracy test for other forecast models. Just to assume with the values of MAPE, the absolute percentage error is only about 3%, which is very low, so this method can be accurate. But still, it needs to be compared with other models later on.
Predicted value : It predicts the value for next 12months for 16.191, uniformly, as it predicts based on the data from the last period.
MA3 = ma(carsls_sub, order=3)
MA6 = ma(carsls_sub, order=6)
MA9 = ma(carsls_sub, order=9)
plot(carsls_sub)
lines(MA3,col="red")
lines(MA6,col="blue")
lines(MA9,col="green")
#Forecast for the next 12 months using the best one(bonus)
MA3_fc = forecast(MA3, h=12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA6_fc = forecast(MA6, h=12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA9_fc = forecast(MA9,h=12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
accuracy(MA3_fc)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.06672768 0.2195715 0.1743123 0.4275958 1.167909 0.1016876
## ACF1
## Training set 0.4996581
accuracy(MA6_fc)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002340272 0.06812097 0.05171541 0.02845714 0.3385603 0.02762252
## ACF1
## Training set 0.08423552
accuracy(MA9_fc)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003493074 0.09149204 0.07412256 -0.01203334 0.4849304 0.0405468
## ACF1
## Training set -0.0120887
#I chose MA6
plot(MA6_fc)
Observations of plots as moving average goes up : In the plot, as moving average goes up, the line becomes smoother. If you see the green line, it’s the smoothest one. As the period we reflect becomes larger, short term noise is mitigated and long term trend tends to be shown. Also, as the order of moving average goes up, reflection to the latest data becomes slower. So it can show the long term trend of the data better.
(bonus) : I used order 6 to forecast for next 12 month, since it had the lowest MAPE.
#Simple smoothing for next 12 months
carsls_ets = ets(carsls_sub,model="ANN")
carsls_ets_fc = forecast(carsls_ets,h=12)
summary(carsls_ets_fc)
##
## Forecast method: ETS(A,N,N)
##
## Model Information:
## ETS(A,N,N)
##
## Call:
## ets(y = carsls_sub, model = "ANN")
##
## Smoothing parameters:
## alpha = 0.5558
##
## Initial states:
## l = 14.5659
##
## sigma: 0.5893
##
## AIC AICc BIC
## 61.13388 62.22479 64.90817
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1008798 0.566215 0.4348502 0.5553676 2.889801 0.2732084
## ACF1
## Training set -0.1227497
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2024 16.02361 15.26835 16.77887 14.86854 17.17869
## Apr 2024 16.02361 15.15954 16.88769 14.70212 17.34510
## May 2024 16.02361 15.06297 16.98425 14.55444 17.49278
## Jun 2024 16.02361 14.97527 17.07196 14.42030 17.62692
## Jul 2024 16.02361 14.89435 17.15287 14.29655 17.75067
## Aug 2024 16.02361 14.81886 17.22837 14.18110 17.86612
## Sep 2024 16.02361 14.74782 17.29940 14.07246 17.97476
## Oct 2024 16.02361 14.68054 17.36668 13.96956 18.07766
## Nov 2024 16.02361 14.61647 17.43075 13.87158 18.17564
## Dec 2024 16.02361 14.55520 17.49203 13.77786 18.26936
## Jan 2025 16.02361 14.49638 17.55085 13.68791 18.35931
## Feb 2025 16.02361 14.43974 17.60748 13.60129 18.44593
I used ets model and set the model to “ANN”, which stands for additive error, no trend, no seasonality. (Simple smoothing!)
alpha is 0.5558, which is in the middle of 0 and 1. This model makes predictions by balancing recent and historical data.
The value of initial state is 14.5659, and it represents the initial value when the data starts prediction.
The value of sigma is 0.5896. It stands for the standard deviation of residuals. The smaller the value, the more accurate the prediction. value of 0.5893 seems to be small considering the values in the data.
#Residual Analysis
carsls_ets_res = residuals(carsls_ets_fc)
#Plot of residuals
plot(carsls_ets_res)
abline(h=0,col="red")
#Histogram
hist(carsls_ets_res)
#Plot of fitted vs. residuals
carsls_ets_fitted = fitted(carsls_ets_fc)
ggplot(data = data.frame(fitted = carsls_ets_fitted, residuals = carsls_ets_res),
aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, color="red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
#Plot of actual vs. residuals
plot(as.numeric(carsls_sub),carsls_ets_res, xlab="actual",ylab="residuals")
abline(h=0,col="red")
#Acf
plot(Acf(carsls_ets_res))
#Measures of accuracy
accuracy(carsls_ets_fc)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1008798 0.566215 0.4348502 0.5553676 2.889801 0.2732084
## ACF1
## Training set -0.1227497
#Forecast for next year (table, plot)
carsls_ets_fc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2024 16.02361 15.26835 16.77887 14.86854 17.17869
## Apr 2024 16.02361 15.15954 16.88769 14.70212 17.34510
## May 2024 16.02361 15.06297 16.98425 14.55444 17.49278
## Jun 2024 16.02361 14.97527 17.07196 14.42030 17.62692
## Jul 2024 16.02361 14.89435 17.15287 14.29655 17.75067
## Aug 2024 16.02361 14.81886 17.22837 14.18110 17.86612
## Sep 2024 16.02361 14.74782 17.29940 14.07246 17.97476
## Oct 2024 16.02361 14.68054 17.36668 13.96956 18.07766
## Nov 2024 16.02361 14.61647 17.43075 13.87158 18.17564
## Dec 2024 16.02361 14.55520 17.49203 13.77786 18.26936
## Jan 2025 16.02361 14.49638 17.55085 13.68791 18.35931
## Feb 2025 16.02361 14.43974 17.60748 13.60129 18.44593
plot(carsls_ets_fc)
How good is the accuracy? : Considering the MAPE of naive was 3.41, accuracy of this simple smoothing method is better than Naive. Also, Root mean squared error is 0.5, which can be considered really low.
Predicted value : By checking the table, it predicts the value of the time series to be 16.02 for the whole forecast period. The plot is similar to Naive method since it also shows the same value for the given period. It is because this model forecasts only based on the “level” of the data.
#forecast for next 12 months
carsls_hw = hw(carsls_sub,h=12)
summary(carsls_hw)
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = carsls_sub, h = 12)
##
## Smoothing parameters:
## alpha = 2e-04
## beta = 2e-04
## gamma = 0.9997
##
## Initial states:
## l = 13.424
## b = 0.1305
## s = -0.9951 -0.2147 0.5375 -0.7997 -0.2747 -0.8722
## 0.2528 -1.0152 0.5749 0.2429 0.6483 1.9153
##
## sigma: 1.0571
##
## AIC AICc BIC
## 96.75263 173.25263 118.14027
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01760396 0.655559 0.5118556 -0.1586194 3.307403 0.3215895
## ACF1
## Training set 0.1938631
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2024 17.23629 15.88162 18.59096 15.16450 19.30809
## Apr 2024 18.01447 16.65980 19.36914 15.94268 20.08626
## May 2024 17.51639 16.16172 18.87107 15.44460 19.58819
## Jun 2024 17.99612 16.64145 19.35080 15.92433 20.06792
## Jul 2024 17.86972 16.51505 19.22440 15.79792 19.94152
## Aug 2024 17.47630 16.12162 18.83097 15.40450 19.54809
## Sep 2024 17.75974 16.40506 19.11441 15.68794 19.83154
## Oct 2024 17.36545 16.01077 18.72013 15.29365 19.43725
## Nov 2024 17.51113 16.15645 18.86581 15.43932 19.58293
## Dec 2024 17.95157 16.59689 19.30626 15.87976 20.02338
## Jan 2025 17.07859 15.72390 18.43328 15.00678 19.15040
## Feb 2025 17.75657 16.40188 19.11126 15.68475 19.82839
Value of alpha(level): 2e-04. This real small value means that past data are much more weighted compared to recent data.
Value of beta(trend): 2e-04, which is similar to alpha. This model does not reflect drastic trend changes, but rather seeks more stable trend changes.
Value of gamma(seasonality) : 0.9997. The model puts weight to current seasonal changes than past seasonal changes.
Initial state : level=13.4 it shows the initial level of data from the starting point of the forecast / trend=0.13 In a model with a trend, this initial trend value is used to start the prediction. Here, it’s not significant / seasonality= See above. Indicates the initial value corresponding to each season(month).
Value of sigma(standard deviations of error) : 1.0571. Small sigma value means that the model fits the data better, while a large sigma value means that there is greater variation between the predicted and actual values.
#Residual Analysis
carsls_hw_res = residuals(carsls_hw)
#Plot
plot(carsls_hw_res)
abline(h=0,col="red")
#Histogram
hist(carsls_hw_res)
#Plot of fitted vs. residuals
carsls_hw_fitted = fitted(carsls_hw)
ggplot(data = data.frame(fitted = carsls_hw_fitted, residuals = carsls_hw_res),
aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, color="red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
#Plot of actual vs. residuals
plot(as.numeric(carsls_sub),carsls_hw_res, xlab="actual",ylab="residuals")
abline(h=0,col="red")
#Acf
plot(Acf(carsls_hw_res))
#Measures of accuracy
accuracy(carsls_hw)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01760396 0.655559 0.5118556 -0.1586194 3.307403 0.3215895
## ACF1
## Training set 0.1938631
#Forecast for next year (table, plot)
carsls_hw
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2024 17.23629 15.88162 18.59096 15.16450 19.30809
## Apr 2024 18.01447 16.65980 19.36914 15.94268 20.08626
## May 2024 17.51639 16.16172 18.87107 15.44460 19.58819
## Jun 2024 17.99612 16.64145 19.35080 15.92433 20.06792
## Jul 2024 17.86972 16.51505 19.22440 15.79792 19.94152
## Aug 2024 17.47630 16.12162 18.83097 15.40450 19.54809
## Sep 2024 17.75974 16.40506 19.11441 15.68794 19.83154
## Oct 2024 17.36545 16.01077 18.72013 15.29365 19.43725
## Nov 2024 17.51113 16.15645 18.86581 15.43932 19.58293
## Dec 2024 17.95157 16.59689 19.30626 15.87976 20.02338
## Jan 2025 17.07859 15.72390 18.43328 15.00678 19.15040
## Feb 2025 17.75657 16.40188 19.11126 15.68475 19.82839
plot(carsls_hw)
How good is the accuracy? : MAPE is smaller than Naive, but larger than simple smoothing. RMSE is also larger than simple smoothing.
Predicted time series value : can be checked in the above table. Unlike Naive or simple smoothing, it has various different values for the given period.
# Is Time Series data Stationary? How did you verify?
# How many differences are needed to make it stationary?
# Is Seasonality component needed?
tsdisplay(carsls_sub)
ndiffs(carsls_sub)
## [1] 1
nsdiffs(carsls_sub)
## [1] 0
Acf(carsls_sub)
# Plot the Time Series chart of the differenced series.
# Plot the ACF and PACF plots of the differenced series.
# Based on the ACF and PACF, which are the possible ARIMA models?
carslsdiff = diff(carsls_sub,1)
tsdisplay(carslsdiff)
arima1 = Arima(carsls_sub, order = c(1, 1, 1))
arima2 = Arima(carsls_sub, order = c(2, 1, 1))
arima3 =Arima(carsls_sub, order = c(3, 1, 1))
a=c(arima1$aic,arima1$bic,arima1$sigma2)
b=c(arima2$aic,arima2$bic,arima2$sigma2)
c=c(arima3$aic,arima3$bic,arima3$sigma2)
arimas = rbind(a,b,c)
rownames(arimas) = c("(1,1,1)","(2,1,1)","(3,1,1)")
colnames(arimas) = c("aic","bic","sigma")
arimas
## aic bic sigma
## (1,1,1) 49.03047 52.68710 0.3518404
## (2,1,1) 43.92843 48.80394 0.2425013
## (3,1,1) 46.74860 52.84297 0.2907997
auto.arima(carsls_sub,trace=TRUE, stepwise = FALSE)
##
## ARIMA(0,1,0) : 51.57352
## ARIMA(0,1,0) with drift : 53.77824
## ARIMA(0,1,1) : 48.24721
## ARIMA(0,1,1) with drift : 49.88389
## ARIMA(0,1,2) : 48.51251
## ARIMA(0,1,2) with drift : 50.95166
## ARIMA(0,1,3) : 50.25856
## ARIMA(0,1,3) with drift : 53.11982
## ARIMA(0,1,4) : Inf
## ARIMA(0,1,4) with drift : Inf
## ARIMA(0,1,5) : Inf
## ARIMA(0,1,5) with drift : Inf
## ARIMA(1,1,0) : 48.62806
## ARIMA(1,1,0) with drift : 50.76726
## ARIMA(1,1,1) : 50.17333
## ARIMA(1,1,1) with drift : 52.19397
## ARIMA(1,1,2) : 51.28083
## ARIMA(1,1,2) with drift : 54.071
## ARIMA(1,1,3) : Inf
## ARIMA(1,1,3) with drift : Inf
## ARIMA(1,1,4) : Inf
## ARIMA(1,1,4) with drift : Inf
## ARIMA(2,1,0) : 48.20508
## ARIMA(2,1,0) with drift : 49.80281
## ARIMA(2,1,1) : Inf
## ARIMA(2,1,1) with drift : Inf
## ARIMA(2,1,2) : 49.31717
## ARIMA(2,1,2) with drift : 52.19999
## ARIMA(2,1,3) : Inf
## ARIMA(2,1,3) with drift : Inf
## ARIMA(3,1,0) : 46.77066
## ARIMA(3,1,0) with drift : 49.42144
## ARIMA(3,1,1) : 49.90649
## ARIMA(3,1,1) with drift : 52.80319
## ARIMA(3,1,2) : Inf
## ARIMA(3,1,2) with drift : Inf
## ARIMA(4,1,0) : 49.90939
## ARIMA(4,1,0) with drift : 52.80723
## ARIMA(4,1,1) : 53.4198
## ARIMA(4,1,1) with drift : Inf
## ARIMA(5,1,0) : 53.41251
## ARIMA(5,1,0) with drift : 56.69595
##
##
##
## Best model: ARIMA(3,1,0)
## Series: carsls_sub
## ARIMA(3,1,0)
##
## Coefficients:
## ar1 ar2 ar3
## -0.4560 -0.1214 0.4010
## s.e. 0.1811 0.1998 0.1803
##
## sigma^2 = 0.278: log likelihood = -18.39
## AIC=44.77 AICc=46.77 BIC=49.65
#Residual Analysis
carsls_arima = auto.arima(carsls_sub,trace=TRUE, stepwise = FALSE)
##
## ARIMA(0,1,0) : 51.57352
## ARIMA(0,1,0) with drift : 53.77824
## ARIMA(0,1,1) : 48.24721
## ARIMA(0,1,1) with drift : 49.88389
## ARIMA(0,1,2) : 48.51251
## ARIMA(0,1,2) with drift : 50.95166
## ARIMA(0,1,3) : 50.25856
## ARIMA(0,1,3) with drift : 53.11982
## ARIMA(0,1,4) : Inf
## ARIMA(0,1,4) with drift : Inf
## ARIMA(0,1,5) : Inf
## ARIMA(0,1,5) with drift : Inf
## ARIMA(1,1,0) : 48.62806
## ARIMA(1,1,0) with drift : 50.76726
## ARIMA(1,1,1) : 50.17333
## ARIMA(1,1,1) with drift : 52.19397
## ARIMA(1,1,2) : 51.28083
## ARIMA(1,1,2) with drift : 54.071
## ARIMA(1,1,3) : Inf
## ARIMA(1,1,3) with drift : Inf
## ARIMA(1,1,4) : Inf
## ARIMA(1,1,4) with drift : Inf
## ARIMA(2,1,0) : 48.20508
## ARIMA(2,1,0) with drift : 49.80281
## ARIMA(2,1,1) : Inf
## ARIMA(2,1,1) with drift : Inf
## ARIMA(2,1,2) : 49.31717
## ARIMA(2,1,2) with drift : 52.19999
## ARIMA(2,1,3) : Inf
## ARIMA(2,1,3) with drift : Inf
## ARIMA(3,1,0) : 46.77066
## ARIMA(3,1,0) with drift : 49.42144
## ARIMA(3,1,1) : 49.90649
## ARIMA(3,1,1) with drift : 52.80319
## ARIMA(3,1,2) : Inf
## ARIMA(3,1,2) with drift : Inf
## ARIMA(4,1,0) : 49.90939
## ARIMA(4,1,0) with drift : 52.80723
## ARIMA(4,1,1) : 53.4198
## ARIMA(4,1,1) with drift : Inf
## ARIMA(5,1,0) : 53.41251
## ARIMA(5,1,0) with drift : 56.69595
##
##
##
## Best model: ARIMA(3,1,0)
carsls_arima_res = carsls_arima$residuals
#plot
plot(carsls_arima_res)
abline(h=0,col="red")
#Histogram
hist(carsls_arima_res)
#Plot of fitted vs. residuals
carsls_arima_fitted = fitted(carsls_arima)
ggplot(data = data.frame(fitted = carsls_arima_fitted, residuals = carsls_arima_res),
aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, color="red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
#Plot of actual vs. residuals
plot(as.numeric(carsls_sub),carsls_arima_res, xlab="actual",ylab="residuals")
abline(h=0,col="red")
#Acf
plot(Acf(carsls_arima_res))
#Measures of accuracy
accuracy(carsls_arima)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.05453538 0.4850027 0.369875 0.2882758 2.481099 0.2323857
## ACF1
## Training set 0.01221806
#Forecast for next 1 & 2 year
carsls_arima_fc_1Y = forecast(carsls_arima,h=12)
carsls_arima_fc_2Y = forecast(carsls_arima,h=24)
#Table and plot
# 1Y
carsls_arima_fc_1Y
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2024 16.16459 15.48888 16.84029 15.13119 17.19799
## Apr 2024 15.74432 14.97511 16.51353 14.56792 16.92072
## May 2024 16.21102 15.33169 17.09035 14.86619 17.55584
## Jun 2024 16.03861 14.90985 17.16737 14.31233 17.76489
## Jul 2024 15.89209 14.67744 17.10673 14.03444 17.74973
## Aug 2024 16.16695 14.83107 17.50283 14.12390 18.21000
## Sep 2024 15.99026 14.50401 17.47652 13.71723 18.26329
## Oct 2024 15.97873 14.41346 17.54400 13.58486 18.37260
## Nov 2024 16.11564 14.44085 17.79042 13.55428 18.67700
## Dec 2024 15.98376 14.20283 17.76470 13.26006 18.70747
## Jan 2025 16.02266 14.16621 17.87911 13.18346 18.86186
## Feb 2025 16.07582 14.12419 18.02745 13.09106 19.06058
plot(carsls_arima_fc_1Y)
# 2Y
carsls_arima_fc_2Y
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2024 16.16459 15.48888 16.84029 15.13119 17.19799
## Apr 2024 15.74432 14.97511 16.51353 14.56792 16.92072
## May 2024 16.21102 15.33169 17.09035 14.86619 17.55584
## Jun 2024 16.03861 14.90985 17.16737 14.31233 17.76489
## Jul 2024 15.89209 14.67744 17.10673 14.03444 17.74973
## Aug 2024 16.16695 14.83107 17.50283 14.12390 18.21000
## Sep 2024 15.99026 14.50401 17.47652 13.71723 18.26329
## Oct 2024 15.97873 14.41346 17.54400 13.58486 18.37260
## Nov 2024 16.11564 14.44085 17.79042 13.55428 18.67700
## Dec 2024 15.98376 14.20283 17.76470 13.26006 18.70747
## Jan 2025 16.02266 14.16621 17.87911 13.18346 18.86186
## Feb 2025 16.07582 14.12419 18.02745 13.09106 19.06058
## Mar 2025 15.99398 13.95788 18.03009 12.88003 19.10794
## Apr 2025 16.04045 13.93186 18.14903 12.81564 19.26525
## May 2025 16.05050 13.85911 18.24190 12.69906 19.40195
## Jun 2025 16.00746 13.74321 18.27172 12.54458 19.47035
## Jul 2025 16.04450 13.71125 18.37775 12.47610 19.61291
## Aug 2025 16.03687 13.63043 18.44331 12.35653 19.71720
## Sep 2025 16.01860 13.54621 18.49099 12.23740 19.79979
## Oct 2025 16.04270 13.50515 18.58026 12.16185 19.92356
## Nov 2025 16.03087 13.42733 18.63441 12.04909 20.01264
## Dec 2025 16.02601 13.36119 18.69083 11.95052 20.10151
## Jan 2026 16.03933 13.31318 18.76548 11.87004 20.20862
## Feb 2026 16.02910 13.24234 18.81587 11.76711 20.29109
plot(carsls_arima_fc_2Y)
How good is the accuracy? : MAPE is 2.48, which is smaller than Naive,HW, and simple smoothing. But still bigger than simple moving average.
Predicted time series value : can be checked in the above table & plot. It has various different values for the given period. From the 2Y plot, I could see the values become smoother as time goes.
d=accuracy(carsls_naive)
e=accuracy(MA6_fc)
f=accuracy(carsls_ets_fc)
g=accuracy(carsls_hw)
h=accuracy(carsls_arima)
accuracymeasure = rbind(d,e,f,g,h)
rownames(accuracymeasure) = c("naive","MA6","Simple smoothing","HW","ARIMA")
accuracymeasure
## ME RMSE MAE MPE MAPE
## naive 0.053000000 0.64989381 0.51412000 0.24604323 3.4113871
## MA6 0.002340272 0.06812097 0.05171541 0.02845714 0.3385603
## Simple smoothing 0.100879843 0.56621504 0.43485016 0.55536757 2.8898006
## HW -0.017603957 0.65555899 0.51185559 -0.15861944 3.3074032
## ARIMA 0.054535382 0.48500267 0.36987497 0.28827581 2.4810986
## MASE ACF1
## naive 0.32301216 -0.43569823
## MA6 0.02762252 0.08423552
## Simple smoothing 0.27320838 -0.12274966
## HW 0.32158947 0.19386313
## ARIMA 0.23238566 0.01221806
About the methods
Naive : This is the simplest prediction method, which uses the values of the previous time point as the predicted values for the next time point. It can be useful when the data does not have a particular trend or seasonality. It has the advantage of being able to make predictions quickly.
Moving Average : A method of forecasting using the average of data over a certain period of time. It is suitable when there is no trend and the data is relatively stable.
Simple smoothing : A method that reflects the level of the data. This model makes predictions based on recent data, but reflects only the level of the data without trends or seasonality, so useful when there is no clear trend or seasonality in the data.
Holt winters : A method that reflects level, trend, and seasonality. Suitable for time series data with clear trends and seasonality.
ARIMA : A method that combines AR(Auto regressive), I(Integrated), and MA(Moving Average). While it requires careful tuning, ARIMA provides accurate forecasts for a wide range of time series data.
Best and worst method for each accuracy measure.
ME (Mean Error) : This measures the overall direction of the forecast, so not suitable to evaluate the accuracy of the prediction. HW overestimates the data, and simple smoothing underestimates the value the most.
RMSE (Root mean squire error) : Best is Moving average and worst is Holt-Winters. RMSE is a good indicator of prediction accuracy because it is more sensitive to large errors. MA6 has the smallest RMSE and is evaluated as the most accurate model.
MAE(Mean Absolute Error) : This method gives equal importance to all errors, and the smaller the error, the more accurate it is. Best is Moving average and Worst is Naive method.
MPE(Mean Percentage Error) : This method allows us to determine the relative error. Moving average has the smallest value, indicating that it underestimates by 0.02%. Holt-winters is overestimating the value by 0.15%.
MAPE(Mean Absolute Percentage Error) : This shows errors as absolute percentages. Moving average is the best, while Naive is the worst with the largest value.
MASE (Mean Absolute Scaled Error) : Best is Moving average, worst is Naive.
ACF1 : ARIMA is evaluated as the best fitting model with the lowest lag-1 correlation, while Naive has the highest correlation.
Overall, Moving Average is the best model that outstands all other models in accuracy measures.
MA6_fc_2y = forecast(MA6,h=24)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA6_fc_2y
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Dec 2023 15.93874 15.837018 16.04046 15.783171 16.09431
## Jan 2024 15.89648 15.669163 16.12379 15.548829 16.24413
## Feb 2024 15.85422 15.474076 16.23436 15.272840 16.43560
## Mar 2024 15.81196 15.255828 16.36809 14.961429 16.66249
## Apr 2024 15.76970 15.017156 16.52225 14.618782 16.92062
## May 2024 15.72744 14.760039 16.69484 14.247927 17.20696
## Jun 2024 15.68518 14.485990 16.88437 13.851176 17.51919
## Jul 2024 15.64292 14.196210 17.08964 13.430367 17.85548
## Aug 2024 15.60066 13.891683 17.30964 12.987004 18.21432
## Sep 2024 15.55840 13.573232 17.54358 12.522346 18.59446
## Oct 2024 15.51615 13.241558 17.79073 12.037465 18.99483
## Nov 2024 15.47389 12.897265 18.05051 11.533285 19.41449
## Dec 2024 15.43163 12.540881 18.32237 11.010613 19.85264
## Jan 2025 15.38937 12.172870 18.60587 10.470158 20.30858
## Feb 2025 15.34711 11.793642 18.90058 9.912550 20.78167
## Mar 2025 15.30485 11.403564 19.20613 9.338348 21.27135
## Apr 2025 15.26259 11.002963 19.52222 8.748053 21.77713
## May 2025 15.22033 10.592135 19.84853 8.142116 22.29855
## Jun 2025 15.17807 10.171343 20.18480 7.520941 22.83520
## Jul 2025 15.13581 9.740827 20.53080 6.884894 23.38673
## Aug 2025 15.09355 9.300801 20.88630 6.234304 23.95280
## Sep 2025 15.05129 8.851461 21.25113 5.569468 24.53312
## Oct 2025 15.00903 8.392981 21.62509 4.890654 25.12741
## Nov 2025 14.96677 7.925518 22.00803 4.198102 25.73545
plot(MA6_fc_2y)
MA method says that the value will be stable over the 2 years with a slight downward trends.
Rank : 1.MA / 2. ARIMA / 3. Simple Smoothing / 4. Holt-Winters / 5. Naive
Most of the methods showed better accuracy measure than Naive. So Naive is the last place.
Moving Average surpassed all the other methods in terms of every accuracy measures except for ACF1. So this is the number one rank.
Next is ARIMA, based on the accuracy measures.